#ifndef _IFFT_IL_SOURCE_H_
#define _IFFT_IL_SOURCE_H_

namespace amdspl
{
    namespace fft
    {

static const char* _ifft8_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 1.121038771e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000008 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l3 = (4.203895393e-45f, 8.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x00000003, 0x41000000, 0x00000000, 0x00000000 \n"
"imul r70.x___, vAbsTidFlat0.x, l0.w \n"
"call 5 \n"
"call 4 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+1] \n"
"mov r402, g[r70.x+2] \n"
"mov r403, g[r70.x+3] \n"
"mov r404, g[r70.x+4] \n"
"mov r405, g[r70.x+5] \n"
"mov r406, g[r70.x+6] \n"
"mov r407, g[r70.x+7] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+1], r404 \n"
"mov g[r70.x+2], r402 \n"
"mov g[r70.x+3], r406 \n"
"mov g[r70.x+4], r401 \n"
"mov g[r70.x+5], r405 \n"
"mov g[r70.x+6], r403 \n"
"mov g[r70.x+7], r407 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l3.y \n"
"div_zeroop(fltmax) r401, r401, l3.y \n"
"div_zeroop(fltmax) r402, r402, l3.y \n"
"div_zeroop(fltmax) r403, r403, l3.y \n"
"div_zeroop(fltmax) r404, r404, l3.y \n"
"div_zeroop(fltmax) r405, r405, l3.y \n"
"div_zeroop(fltmax) r406, r406, l3.y \n"
"div_zeroop(fltmax) r407, r407, l3.y \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _ifft16_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000010 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l3 = (5.605193857e-45f, 16.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x00000004, 0x41800000, 0x00000000, 0x00000000 \n"
"imul r70.x___, vAbsTidFlat0.x, l0.w \n"
"call 5 \n"
"call 4 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT16 \n"
"func 4 \n"
"mov r60, r400 \n"
"mov r61, r404 \n"
"mov r62, r408 \n"
"mov r63, r412 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r404, r61 \n"
"mov r408, r62 \n"
"mov r412, r63 \n"
"mov r60, r401 \n"
"mov r61, r405 \n"
"mov r62, r409 \n"
"mov r63, r413 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r405, r61 \n"
"mov r409, r62 \n"
"mov r413, r63 \n"
"mov r60, r402 \n"
"mov r61, r406 \n"
"mov r62, r410 \n"
"mov r63, r414 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r406, r61 \n"
"mov r410, r62 \n"
"mov r414, r63 \n"
"mov r60, r403 \n"
"mov r61, r407 \n"
"mov r62, r411 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r407, r61 \n"
"mov r411, r62 \n"
"mov r415, r63 \n"
"mov r40, r405 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r40, r409 \n"
"mov r41, l2.yxyx \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r410, r40 \n"
"mul_ieee r410, r410, l1.w \n"
"mov r40, r411 \n"
"mov r41, l2.xyxy \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r413 \n"
"mov r41, l2.xyxy \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r414, r40 \n"
"mul_ieee r414, r414, l1.w \n"
"mov r40, r415 \n"
"mov r41, l2.wzwz \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"mov r60, r408 \n"
"mov r61, r409 \n"
"mov r62, r410 \n"
"mov r63, r411 \n"
"call 3 \n"
"mov r408, r60 \n"
"mov r409, r61 \n"
"mov r410, r62 \n"
"mov r411, r63 \n"
"mov r60, r412 \n"
"mov r61, r413 \n"
"mov r62, r414 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r412, r60 \n"
"mov r413, r61 \n"
"mov r414, r62 \n"
"mov r415, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+1] \n"
"mov r402, g[r70.x+2] \n"
"mov r403, g[r70.x+3] \n"
"mov r404, g[r70.x+4] \n"
"mov r405, g[r70.x+5] \n"
"mov r406, g[r70.x+6] \n"
"mov r407, g[r70.x+7] \n"
"mov r408, g[r70.x+8] \n"
"mov r409, g[r70.x+9] \n"
"mov r410, g[r70.x+10] \n"
"mov r411, g[r70.x+11] \n"
"mov r412, g[r70.x+12] \n"
"mov r413, g[r70.x+13] \n"
"mov r414, g[r70.x+14] \n"
"mov r415, g[r70.x+15] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+1], r408 \n"
"mov g[r70.x+2], r404 \n"
"mov g[r70.x+3], r412 \n"
"mov g[r70.x+4], r402 \n"
"mov g[r70.x+5], r410 \n"
"mov g[r70.x+6], r406 \n"
"mov g[r70.x+7], r414 \n"
"mov g[r70.x+8], r401 \n"
"mov g[r70.x+9], r409 \n"
"mov g[r70.x+10], r405 \n"
"mov g[r70.x+11], r413 \n"
"mov g[r70.x+12], r403 \n"
"mov g[r70.x+13], r411 \n"
"mov g[r70.x+14], r407 \n"
"mov g[r70.x+15], r415 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l3.y \n"
"div_zeroop(fltmax) r401, r401, l3.y \n"
"div_zeroop(fltmax) r402, r402, l3.y \n"
"div_zeroop(fltmax) r403, r403, l3.y \n"
"div_zeroop(fltmax) r404, r404, l3.y \n"
"div_zeroop(fltmax) r405, r405, l3.y \n"
"div_zeroop(fltmax) r406, r406, l3.y \n"
"div_zeroop(fltmax) r407, r407, l3.y \n"
"div_zeroop(fltmax) r408, r408, l3.y \n"
"div_zeroop(fltmax) r409, r409, l3.y \n"
"div_zeroop(fltmax) r410, r410, l3.y \n"
"div_zeroop(fltmax) r411, r411, l3.y \n"
"div_zeroop(fltmax) r412, r412, l3.y \n"
"div_zeroop(fltmax) r413, r413, l3.y \n"
"div_zeroop(fltmax) r414, r414, l3.y \n"
"div_zeroop(fltmax) r415, r415, l3.y \n"
"ret \n"
"endfunc \n"
"end \n";

const char *_ifft64_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 32 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l5 = (7.006492322e-45f, 64.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x00000005, 0x42800000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, 25.13274193f, 12.56637096f, 37.69911194f, ) \n"
"dcl_literal l10, 0x00000000, 0x41C90FDB, 0x41490FDB, 0x4216CBE4 \n"
"; l11 = (6.283185482f, 31.41592598f, 18.84955597f, 43.98229599f, ) \n"
"dcl_literal l11, 0x40C90FDB, 0x41FB53D1, 0x4196CBE4, 0x422FEDDF \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"and r0.x___, vAbsTidFlat0.x, l23.z \n"
"ishl r0.x___, r0.x, l23.x \n"
"and r1.x___, vAbsTidFlat0.x, l23.w \n"
"iadd r70.x___, r0, r1 \n"
"call 5 \n"
"call 4 \n"
"and r0.x___, vTidInGrpFlat0.x, l23.w \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l5.y \n"
"call 7 \n"
"call 8 \n"
"and r75.x___, vTidInGrpFlat0.x, l23.z \n"
"and r75._y__, vTidInGrpFlat0.x, l23.w \n"
"ishl r75._y__, r75.y, l0.w \n"
"call 9 \n"
"call 4 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r100, r80, l10 \n"
"mul_ieee r101, r80, l11 \n"
"cos_vec r110, r100 \n"
"cos_vec r111, r101 \n"
"sin_vec r120, r100 \n"
"sin_vec r121, r101 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"ret \n"
"endfunc \n"
"func 8 \n"
"lds_write_vec mem0, r400 \n"
"lds_write_vec_lOffset(4) mem0, r404 \n"
"lds_write_vec_lOffset(8) mem0, r402 \n"
"lds_write_vec_lOffset(12) mem0, r406 \n"
"lds_write_vec_lOffset(16) mem0, r401 \n"
"lds_write_vec_lOffset(20) mem0, r405 \n"
"lds_write_vec_lOffset(24) mem0, r403 \n"
"lds_write_vec_lOffset(28) mem0, r407 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 9 \n"
"lds_read_vec r400, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r401, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r402, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r403, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r404, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r405, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r406, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"lds_read_vec r407, r75.xyyy \n"
"iadd r75.x___, r75.x, l0.y \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";IFFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";IFFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+8] \n"
"mov r402, g[r70.x+16] \n"
"mov r403, g[r70.x+24] \n"
"mov r404, g[r70.x+32] \n"
"mov r405, g[r70.x+40] \n"
"mov r406, g[r70.x+48] \n"
"mov r407, g[r70.x+56] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+8], r404 \n"
"mov g[r70.x+16], r402 \n"
"mov g[r70.x+24], r406 \n"
"mov g[r70.x+32], r401 \n"
"mov g[r70.x+40], r405 \n"
"mov g[r70.x+48], r403 \n"
"mov g[r70.x+56], r407 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l5.y \n"
"div_zeroop(fltmax) r401, r401, l5.y \n"
"div_zeroop(fltmax) r402, r402, l5.y \n"
"div_zeroop(fltmax) r403, r403, l5.y \n"
"div_zeroop(fltmax) r404, r404, l5.y \n"
"div_zeroop(fltmax) r405, r405, l5.y \n"
"div_zeroop(fltmax) r406, r406, l5.y \n"
"div_zeroop(fltmax) r407, r407, l5.y \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _ifft512_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 32 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l5 = (8.407790786e-45f, 64.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x00000006, 0x42800000, 0x00000000, 0x00000000 \n"
"; l6 = (1.261168618e-44f, 512.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l6, 0x00000009, 0x44000000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, 25.13274193f, 12.56637096f, 37.69911194f, ) \n"
"dcl_literal l10, 0x00000000, 0x41C90FDB, 0x41490FDB, 0x4216CBE4 \n"
"; l11 = (6.283185482f, 31.41592598f, 18.84955597f, 43.98229599f, ) \n"
"dcl_literal l11, 0x40C90FDB, 0x41FB53D1, 0x4196CBE4, 0x422FEDDF \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l26 = (8.407790786e-45f, 8.968310172e-44f, -1.#QNANf, 8.828180325e-44f, ) \n"
"dcl_literal l26, 0x00000006, 0x00000040, 0xFFFFFFC0, 0x0000003F \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l6.x \n"
"iadd r70.x___, r70, vTidInGrpFlat0.x \n"
"call 5 \n"
"call 4 \n"
"itof r80.x___, vTidInGrpFlat0.x \n"
"div_zeroop(fltmax) r80, r80.x, l6.y \n"
"call 7 \n"
"call 8 \n"
"ishr r75.x___, vTidInGrpFlat0.x, l23.x \n"
"and r75._y__, vTidInGrpFlat0.x, l23.w \n"
"ishl r75._y__, r75.y, l0.w \n"
"mov r75.__z_, l23.y \n"
"call 9 \n"
"call 4 \n"
"ishr r80.x___, vTidInGrpFlat0.x, l23.x \n"
"itof r80.x___, r80.x \n"
"div_zeroop(fltmax) r80, r80.x, l5.y \n"
"call 7 \n"
"call 8 \n"
"and r75.x___, vTidInGrpFlat0.x, l23.w \n"
"ishr r75._y__, vTidInGrpFlat0.x, l23.x \n"
"ishl r75._y__, r75.y, l0.w \n"
"mov r75.__z_, l23.y \n"
"call 9 \n"
"call 4 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r100, r80, l10 \n"
"mul_ieee r101, r80, l11 \n"
"cos_vec r110, r100 \n"
"cos_vec r111, r101 \n"
"sin_vec r120, r100 \n"
"sin_vec r121, r101 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"ret \n"
"endfunc \n"
"func 8 \n"
"lds_write_vec mem0, r400 \n"
"lds_write_vec_lOffset(4) mem0, r404 \n"
"lds_write_vec_lOffset(8) mem0, r402 \n"
"lds_write_vec_lOffset(12) mem0, r406 \n"
"lds_write_vec_lOffset(16) mem0, r401 \n"
"lds_write_vec_lOffset(20) mem0, r405 \n"
"lds_write_vec_lOffset(24) mem0, r403 \n"
"lds_write_vec_lOffset(28) mem0, r407 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 9 \n"
"lds_read_vec r400, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r401, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r402, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r403, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r404, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r405, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r406, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r407, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";IFFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";IFFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r404 \n"
"mov g[r70.x+128], r402 \n"
"mov g[r70.x+192], r406 \n"
"mov g[r70.x+256], r401 \n"
"mov g[r70.x+320], r405 \n"
"mov g[r70.x+384], r403 \n"
"mov g[r70.x+448], r407 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l6.y \n"
"div_zeroop(fltmax) r401, r401, l6.y \n"
"div_zeroop(fltmax) r402, r402, l6.y \n"
"div_zeroop(fltmax) r403, r403, l6.y \n"
"div_zeroop(fltmax) r404, r404, l6.y \n"
"div_zeroop(fltmax) r405, r405, l6.y \n"
"div_zeroop(fltmax) r406, r406, l6.y \n"
"div_zeroop(fltmax) r407, r407, l6.y \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _ifft1024_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 32 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l4 = (7.006492322e-45f, 32.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l4, 0x00000005, 0x42000000, 0x00000000, 0x00000000 \n"
"; l5 = (8.407790786e-45f, 64.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x00000006, 0x42800000, 0x00000000, 0x00000000 \n"
"; l6 = (1.261168618e-44f, 512.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l6, 0x00000009, 0x44000000, 0x00000000, 0x00000000 \n"
"; l7 = (1.401298464e-44f, 1024.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l7, 0x0000000A, 0x44800000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, 50.26548386f, 25.13274193f, 75.39822388f, ) \n"
"dcl_literal l10, 0x00000000, 0x42490FDB, 0x41C90FDB, 0x4296CBE4 \n"
"; l11 = (12.56637096f, 62.83185196f, 37.69911194f, 87.96459198f, ) \n"
"dcl_literal l11, 0x41490FDB, 0x427B53D1, 0x4216CBE4, 0x42AFEDDF \n"
"; l12 = (6.283185482f, 56.54866791f, 31.41592598f, 81.68141174f, ) \n"
"dcl_literal l12, 0x40C90FDB, 0x426231D6, 0x41FB53D1, 0x42A35CE2 \n"
"; l13 = (18.84955597f, 69.11503601f, 43.98229599f, 94.24777985f, ) \n"
"dcl_literal l13, 0x4196CBE4, 0x428A3AE6, 0x422FEDDF, 0x42BC7EDD \n"
"; l14 = (3.141592741f, 6.283185482f, 9.424777985f, 0.0f, ) \n"
"dcl_literal l14, 0x40490FDB, 0x40C90FDB, 0x4116CBE4, 0x00000000 \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l26 = (8.407790786e-45f, 8.968310172e-44f, -1.#QNANf, 8.828180325e-44f, ) \n"
"dcl_literal l26, 0x00000006, 0x00000040, 0xFFFFFFC0, 0x0000003F \n"
"; l30 = (0.0f, 0.0f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l30, 0x00000000, 0x00000000, 0xFFFFFFEF, 0x00000010 \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l7.x \n"
"iadd r70.x___, r70.x, vTidInGrpFlat0.x \n"
"call 5 \n"
"call 4 \n"
"itof r80.x___, vTidInGrpFlat0.x \n"
"div_zeroop(fltmax) r80, r80.x, l7.y \n"
"call 7 \n"
"call 80 \n"
"call 8 \n"
"and r75.___w, vTidInGrpFlat0.x, l24.w \n"
"ishr r75._y__, vTidInGrpFlat0.x, l24.x \n"
"ishl r75._y__, r75.y, l0.w \n"
"mov r75.__z_, l24.y \n"
"call 9 \n"
"call 101 \n"
"call 81 \n"
"ishr r75._y__, vTidInGrpFlat0.x, l24.x \n"
"ishl r75._y__, r75.y, l0.w \n"
"call 91 \n"
"call 102 \n"
"and r80.x___, vTidInGrpFlat0.x, l24.w \n"
"itof r80.x___, r80.x \n"
"div_zeroop(fltmax) r80, r80.x, l4.y \n"
"call 11 \n"
"call 1201 \n"
"call 12 \n"
"and r75.x___, vTidInGrpFlat0.x, l22.w \n"
"ishl r75.x___, r75.x, l24.x \n"
"and r0.x___, vTidInGrpFlat0.x, l22.z \n"
"and r0.x___, r0.x, l24.w \n"
"ishr r0.x___, r0.x, l0.y \n"
"ishr r75._y__, vTidInGrpFlat0.x, l25.x \n"
"iadd r75._y__, r75.y, r0.x \n"
"ishl r75._y__, r75.y, l0.w \n"
"mov r75.__z_, l0.y \n"
"and r76.x___, vTidInGrpFlat0.x, l30.w \n"
"call 13 \n"
"call 1301 \n"
"call 1202 \n"
"call 12 \n"
"and r75.x___, vTidInGrpFlat0.x, l22.w \n"
"ishl r75.x___, r75.x, l24.x \n"
"call 13 \n"
"call 1302 \n"
"call 4 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
";complex multiple \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
";FFT2 \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";IFFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";IFFT16 \n"
"func 4 \n"
"mov r60, r400 \n"
"mov r61, r404 \n"
"mov r62, r408 \n"
"mov r63, r412 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r404, r61 \n"
"mov r408, r62 \n"
"mov r412, r63 \n"
"mov r60, r401 \n"
"mov r61, r405 \n"
"mov r62, r409 \n"
"mov r63, r413 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r405, r61 \n"
"mov r409, r62 \n"
"mov r413, r63 \n"
"mov r60, r402 \n"
"mov r61, r406 \n"
"mov r62, r410 \n"
"mov r63, r414 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r406, r61 \n"
"mov r410, r62 \n"
"mov r414, r63 \n"
"mov r60, r403 \n"
"mov r61, r407 \n"
"mov r62, r411 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r407, r61 \n"
"mov r411, r62 \n"
"mov r415, r63 \n"
"mov r40, r405 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r40, r409 \n"
"mov r41, l2.yxyx \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l1.y \n"
"call 0 \n"
"mov r410, r40 \n"
"mul_ieee r410, r410, l1.w \n"
"mov r40, r411 \n"
"mov r41, l2.xyxy \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r413 \n"
"mov r41, l2.xyxy \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l1.zyzy \n"
"call 0 \n"
"mov r414, r40 \n"
"mul_ieee r414, r414, l1.w \n"
"mov r40, r415 \n"
"mov r41, l2.wzwz \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"mov r60, r408 \n"
"mov r61, r409 \n"
"mov r62, r410 \n"
"mov r63, r411 \n"
"call 3 \n"
"mov r408, r60 \n"
"mov r409, r61 \n"
"mov r410, r62 \n"
"mov r411, r63 \n"
"mov r60, r412 \n"
"mov r61, r413 \n"
"mov r62, r414 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r412, r60 \n"
"mov r413, r61 \n"
"mov r414, r62 \n"
"mov r415, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"mov r408, g[r70.x+512] \n"
"mov r409, g[r70.x+576] \n"
"mov r410, g[r70.x+640] \n"
"mov r411, g[r70.x+704] \n"
"mov r412, g[r70.x+768] \n"
"mov r413, g[r70.x+832] \n"
"mov r414, g[r70.x+896] \n"
"mov r415, g[r70.x+960] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r408 \n"
"mov g[r70.x+128], r404 \n"
"mov g[r70.x+192], r412 \n"
"mov g[r70.x+256], r402 \n"
"mov g[r70.x+320], r410 \n"
"mov g[r70.x+384], r406 \n"
"mov g[r70.x+448], r414 \n"
"mov g[r70.x+512], r401 \n"
"mov g[r70.x+576], r409 \n"
"mov g[r70.x+640], r405 \n"
"mov g[r70.x+704], r413 \n"
"mov g[r70.x+768], r403 \n"
"mov g[r70.x+832], r411 \n"
"mov g[r70.x+896], r407 \n"
"mov g[r70.x+960], r415 \n"
"ret \n"
"endfunc \n"
"func 7 \n"
"mul_ieee r100, r80.x, l10 \n"
"mul_ieee r101, r80.x, l11 \n"
"mul_ieee r102, r80.x, l12 \n"
"mul_ieee r103, r80.x, l13 \n"
"cos_vec r110._yzw, r100 \n"
"cos_vec r111, r101 \n"
"cos_vec r112, r102 \n"
"cos_vec r113, r103 \n"
"sin_vec r120._yzw, r100 \n"
"sin_vec r121, r101 \n"
"sin_vec r122, r102 \n"
"sin_vec r123, r103 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"mov r40, r408 \n"
"mov r41.x_z_, r112.x \n"
"mov r41._y_w, r122.x \n"
"call 0 \n"
"mov r408, r40 \n"
"mov r40, r409 \n"
"mov r41.x_z_, r112.y \n"
"mov r41._y_w, r122.y \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41.x_z_, r112.z \n"
"mov r41._y_w, r122.z \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41.x_z_, r112.w \n"
"mov r41._y_w, r122.w \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41.x_z_, r113.x \n"
"mov r41._y_w, r123.x \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41.x_z_, r113.y \n"
"mov r41._y_w, r123.y \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41.x_z_, r113.z \n"
"mov r41._y_w, r123.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41.x_z_, r113.w \n"
"mov r41._y_w, r123.w \n"
"call 0 \n"
"mov r415, r40 \n"
"ret \n"
"endfunc \n"
"func 80 \n"
"mov r500, r400 \n"
"mov r501, r401 \n"
"mov r502, r402 \n"
"mov r503, r403 \n"
"mov r504, r404 \n"
"mov r505, r405 \n"
"mov r506, r406 \n"
"mov r507, r407 \n"
"mov r508, r408 \n"
"mov r509, r409 \n"
"mov r510, r410 \n"
"mov r511, r411 \n"
"mov r512, r412 \n"
"mov r513, r413 \n"
"mov r514, r414 \n"
"mov r515, r415 \n"
"ret \n"
"endfunc \n"
"func 8 \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r508 \n"
"lds_write_vec_lOffset(8) mem0, r504 \n"
"lds_write_vec_lOffset(12) mem0, r512 \n"
"lds_write_vec_lOffset(16) mem0, r502 \n"
"lds_write_vec_lOffset(20) mem0, r510 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r514 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 81 \n"
"lds_write_vec mem0, r501 \n"
"lds_write_vec_lOffset(4) mem0, r509 \n"
"lds_write_vec_lOffset(8) mem0, r505 \n"
"lds_write_vec_lOffset(12) mem0, r513 \n"
"lds_write_vec_lOffset(16) mem0, r503 \n"
"lds_write_vec_lOffset(20) mem0, r511 \n"
"lds_write_vec_lOffset(24) mem0, r507 \n"
"lds_write_vec_lOffset(28) mem0, r515 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 9 \n"
"mov r75.x___, r75.w \n"
"lds_read_vec r400, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r401, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r402, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r403, r75.xyyy \n"
"iadd r75._y__, r75.y, r75.z \n"
"mov r75.x___, r75.w \n"
"lds_read_vec r404, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r405, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r406, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r407, r75.xyyy \n"
"iadd r75._y__, r75.y, r75.z \n"
"ret \n"
"endfunc \n"
"func 91 \n"
"mov r75.x___, r75.w \n"
"lds_read_vec r408, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r409, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r410, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r411, r75.xyyy \n"
"iadd r75._y__, r75.y, r75.z \n"
"mov r75.x___, r75.w \n"
"lds_read_vec r412, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r413, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r414, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r415, r75.xyyy \n"
"iadd r75._y__, r75.y, r75.z \n"
"ret \n"
"endfunc \n"
"func 101 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 102 \n"
"mov r60, r408 \n"
"mov r61, r409 \n"
"mov r62, r410 \n"
"mov r63, r411 \n"
"call 3 \n"
"mov r408, r60 \n"
"mov r409, r61 \n"
"mov r410, r62 \n"
"mov r411, r63 \n"
"mov r60, r412 \n"
"mov r61, r413 \n"
"mov r62, r414 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r412, r60 \n"
"mov r413, r61 \n"
"mov r414, r62 \n"
"mov r415, r63 \n"
"ret \n"
"endfunc \n"
"func 11 \n"
"mul_ieee r100.xyz_, r80, l14 \n"
"cos_vec r110.xyz_, r100 \n"
"sin_vec r120.xyz_, r100 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r409 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r413 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.x \n"
"mov r41._y_w, r120.x \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r110.x \n"
"mov r41._y_w, r120.x \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r410 \n"
"mov r41.x_z_, r110.x \n"
"mov r41._y_w, r120.x \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r414 \n"
"mov r41.x_z_, r110.x \n"
"mov r41._y_w, r120.x \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mov r40, r411 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r415 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r415, r40 \n"
"ret \n"
"endfunc \n"
"func 1201 \n"
"mov r500.xy__, r400.xyyy \n"
"mov r500.__zw, r402.00xy \n"
"mov r501.xy__, r401.xyyy \n"
"mov r501.__zw, r403.00xy \n"
"mov r502.xy__, r404.xyyy \n"
"mov r502.__zw, r406.00xy \n"
"mov r503.xy__, r405.xyyy \n"
"mov r503.__zw, r407.00xy \n"
"mov r504.xy__, r408.xyyy \n"
"mov r504.__zw, r410.00xy \n"
"mov r505.xy__, r409.xyyy \n"
"mov r505.__zw, r411.00xy \n"
"mov r506.xy__, r412.xyyy \n"
"mov r506.__zw, r414.00xy \n"
"mov r507.xy__, r413.xyyy \n"
"mov r507.__zw, r415.00xy \n"
"ret \n"
"endfunc \n"
"func 1202 \n"
"mov r500.xy__, r400.zwww \n"
"mov r500.__zw, r402.00zw \n"
"mov r501.xy__, r401.zwww \n"
"mov r501.__zw, r403.00zw \n"
"mov r502.xy__, r404.zwww \n"
"mov r502.__zw, r406.00zw \n"
"mov r503.xy__, r405.zwww \n"
"mov r503.__zw, r407.00zw \n"
"mov r504.xy__, r408.zwww \n"
"mov r504.__zw, r410.00zw \n"
"mov r505.xy__, r409.zwww \n"
"mov r505.__zw, r411.00zw \n"
"mov r506.xy__, r412.zwww \n"
"mov r506.__zw, r414.00zw \n"
"mov r507.xy__, r413.zwww \n"
"mov r507.__zw, r415.00zw \n"
"ret \n"
"endfunc \n"
"func 12 \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 13 \n"
"lds_read_vec r500, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r501, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r502, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r503, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r504, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r505, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r506, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r507, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r508, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r509, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r510, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r511, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r512, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r513, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r514, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"lds_read_vec r515, r75.xyyy \n"
"iadd r75.x___, r75.x, r75.z \n"
"ret \n"
"endfunc \n"
"func 1301 \n"
"cmov_logical r400.xy__, r76.xx00, r500.zwww, r500.xyyy \n"
"cmov_logical r401.xy__, r76.xx00, r501.zwww, r501.xyyy \n"
"cmov_logical r402.xy__, r76.xx00, r502.zwww, r502.xyyy \n"
"cmov_logical r403.xy__, r76.xx00, r503.zwww, r503.xyyy \n"
"cmov_logical r404.xy__, r76.xx00, r504.zwww, r504.xyyy \n"
"cmov_logical r405.xy__, r76.xx00, r505.zwww, r505.xyyy \n"
"cmov_logical r406.xy__, r76.xx00, r506.zwww, r506.xyyy \n"
"cmov_logical r407.xy__, r76.xx00, r507.zwww, r507.xyyy \n"
"cmov_logical r408.xy__, r76.xx00, r508.zwww, r508.xyyy \n"
"cmov_logical r409.xy__, r76.xx00, r509.zwww, r509.xyyy \n"
"cmov_logical r410.xy__, r76.xx00, r510.zwww, r510.xyyy \n"
"cmov_logical r411.xy__, r76.xx00, r511.zwww, r511.xyyy \n"
"cmov_logical r412.xy__, r76.xx00, r512.zwww, r512.xyyy \n"
"cmov_logical r413.xy__, r76.xx00, r513.zwww, r513.xyyy \n"
"cmov_logical r414.xy__, r76.xx00, r514.zwww, r514.xyyy \n"
"cmov_logical r415.xy__, r76.xx00, r515.zwww, r515.xyyy \n"
"ret \n"
"endfunc \n"
"func 1302 \n"
"cmov_logical r400.__zw, r76.00xx, r500.00zw, r500.00xy \n"
"cmov_logical r401.__zw, r76.00xx, r501.00zw, r501.00xy \n"
"cmov_logical r402.__zw, r76.00xx, r502.00zw, r502.00xy \n"
"cmov_logical r403.__zw, r76.00xx, r503.00zw, r503.00xy \n"
"cmov_logical r404.__zw, r76.00xx, r504.00zw, r504.00xy \n"
"cmov_logical r405.__zw, r76.00xx, r505.00zw, r505.00xy \n"
"cmov_logical r406.__zw, r76.00xx, r506.00zw, r506.00xy \n"
"cmov_logical r407.__zw, r76.00xx, r507.00zw, r507.00xy \n"
"cmov_logical r408.__zw, r76.00xx, r508.00zw, r508.00xy \n"
"cmov_logical r409.__zw, r76.00xx, r509.00zw, r509.00xy \n"
"cmov_logical r410.__zw, r76.00xx, r510.00zw, r510.00xy \n"
"cmov_logical r411.__zw, r76.00xx, r511.00zw, r511.00xy \n"
"cmov_logical r412.__zw, r76.00xx, r512.00zw, r512.00xy \n"
"cmov_logical r413.__zw, r76.00xx, r513.00zw, r513.00xy \n"
"cmov_logical r414.__zw, r76.00xx, r514.00zw, r514.00xy \n"
"cmov_logical r415.__zw, r76.00xx, r515.00zw, r515.00xy \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l7.y \n"
"div_zeroop(fltmax) r401, r401, l7.y \n"
"div_zeroop(fltmax) r402, r402, l7.y \n"
"div_zeroop(fltmax) r403, r403, l7.y \n"
"div_zeroop(fltmax) r404, r404, l7.y \n"
"div_zeroop(fltmax) r405, r405, l7.y \n"
"div_zeroop(fltmax) r406, r406, l7.y \n"
"div_zeroop(fltmax) r407, r407, l7.y \n"
"div_zeroop(fltmax) r408, r408, l7.y \n"
"div_zeroop(fltmax) r409, r409, l7.y \n"
"div_zeroop(fltmax) r410, r410, l7.y \n"
"div_zeroop(fltmax) r411, r411, l7.y \n"
"div_zeroop(fltmax) r412, r412, l7.y \n"
"div_zeroop(fltmax) r413, r413, l7.y \n"
"div_zeroop(fltmax) r414, r414, l7.y \n"
"div_zeroop(fltmax) r415, r415, l7.y \n"
"ret \n"
"endfunc \n"
"end \n";

const char _ifft2048_fft4_tomo_source_[] = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l3 = (2.802596929e-45f, 4.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x00000002, 0x40800000, 0x00000000, 0x00000000 \n"
"; l4 = (7.006492322e-45f, 32.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l4, 0x00000005, 0x42000000, 0x00000000, 0x00000000 \n"
"; l5 = (8.407790786e-45f, 64.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x00000006, 0x42800000, 0x00000000, 0x00000000 \n"
"; l6 = (1.261168618e-44f, 512.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l6, 0x00000009, 0x44000000, 0x00000000, 0x00000000 \n"
"; l7 = (1.401298464e-44f, 1024.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l7, 0x0000000A, 0x44800000, 0x00000000, 0x00000000 \n"
"; l8 = (1.541428311e-44f, 2048.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l8, 0x0000000B, 0x45000000, 0x00000000, 0x00000000 \n"
"; l9 = (1.121038771e-44f, 256.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l9, 0x00000008, 0x43800000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, 12.56637096f, 6.283185482f, 18.84955597f, ) \n"
"dcl_literal l10, 0x00000000, 0x41490FDB, 0x40C90FDB, 0x4196CBE4 \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l26 = (8.407790786e-45f, 8.968310172e-44f, -1.#QNANf, 8.828180325e-44f, ) \n"
"dcl_literal l26, 0x00000006, 0x00000040, 0xFFFFFFC0, 0x0000003F \n"
"and r0.x___, vThreadGrpIdFlat0.x, l23.w \n"
"ishl r0.x___, r0.x, l26.x \n"
"iadd r0.x___, r0.x, vTidInGrpFlat0.x \n"
"and r0._y__, vThreadGrpIdFlat0.x, l23.z \n"
"ishl r0._y__, r0.y, l9.x \n"
"iadd r70.x___, r0.y, r0.x \n"
"call 5 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l8.y \n"
"call 7 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r105, r80, l10 \n"
"cos_vec r110._yzw, r105 \n"
"sin_vec r120._yzw, r105 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+512] \n"
"mov r402, g[r70.x+1024] \n"
"mov r403, g[r70.x+1536] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+512], r402 \n"
"mov g[r70.x+1024], r401 \n"
"mov g[r70.x+1536], r403 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";IFFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l3.y \n"
"div_zeroop(fltmax) r401, r401, l3.y \n"
"div_zeroop(fltmax) r402, r402, l3.y \n"
"div_zeroop(fltmax) r403, r403, l3.y \n"
"ret \n"
"endfunc \n"
"end \n";

const char _ifft2048_fft4_nt_tomo_source_[] = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l3 = (2.802596929e-45f, 4.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x00000002, 0x40800000, 0x00000000, 0x00000000 \n"
"; l4 = (7.006492322e-45f, 32.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l4, 0x00000005, 0x42000000, 0x00000000, 0x00000000 \n"
"; l5 = (8.407790786e-45f, 64.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x00000006, 0x42800000, 0x00000000, 0x00000000 \n"
"; l6 = (1.261168618e-44f, 512.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l6, 0x00000009, 0x44000000, 0x00000000, 0x00000000 \n"
"; l7 = (1.401298464e-44f, 1024.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l7, 0x0000000A, 0x44800000, 0x00000000, 0x00000000 \n"
"; l8 = (1.541428311e-44f, 2048.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l8, 0x0000000B, 0x45000000, 0x00000000, 0x00000000 \n"
"; l9 = (1.121038771e-44f, 256.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l9, 0x00000008, 0x43800000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, 6.283185482f, 12.56637096f, 18.84955597f, ) \n"
"dcl_literal l10, 0x00000000, 0x40C90FDB, 0x41490FDB, 0x4196CBE4 \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l26 = (8.407790786e-45f, 8.968310172e-44f, -1.#QNANf, 8.828180325e-44f, ) \n"
"dcl_literal l26, 0x00000006, 0x00000040, 0xFFFFFFC0, 0x0000003F \n"
"and r0.x___, vThreadGrpIdFlat0.x, l23.w \n"
"ishl r0.x___, r0.x, l26.x \n"
"iadd r0.x___, r0.x, vTidInGrpFlat0.x \n"
"and r0._y__, vThreadGrpIdFlat0.x, l23.z \n"
"ishl r0._y__, r0.y, l9.x \n"
"iadd r70.x___, r0.y, r0.x \n"
"call 5 \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l8.y \n"
"call 7 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"call 20 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r105, r80, l10 \n"
"cos_vec r110._yzw, r105 \n"
"sin_vec r120._yzw, r105 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+512] \n"
"mov r402, g[r70.x+1024] \n"
"mov r403, g[r70.x+1536] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+512], r402 \n"
"mov g[r70.x+1024], r401 \n"
"mov g[r70.x+1536], r403 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";IFFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xyxy \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
"func 20 \n"
"div_zeroop(fltmax) r400, r400, l3.y \n"
"div_zeroop(fltmax) r401, r401, l3.y \n"
"div_zeroop(fltmax) r402, r402, l3.y \n"
"div_zeroop(fltmax) r403, r403, l3.y \n"
"ret \n"
"endfunc \n"
"end \n";

    }
}

#endif // _IFFT_IL_SOURCE_H_